source('../env.R')
community_data = read_csv(filename(COMMUNITY_OUTPUT_DIR, 'community_assembly_metrics_using_relative_abundance.csv'))
Rows: 336 Columns: 43── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
dbl (43): mntd_normalised, mntd_actual, mntd_min, mntd_max, mntd_mean, mntd_sd, fdiv_normalised, fdiv_actual, fdiv_min, fdiv_max, fdiv_mean, fdiv_sd, mass_fdi...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(community_data)
colnames(community_data)
[1] "mntd_normalised" "mntd_actual" "mntd_min" "mntd_max"
[5] "mntd_mean" "mntd_sd" "fdiv_normalised" "fdiv_actual"
[9] "fdiv_min" "fdiv_max" "fdiv_mean" "fdiv_sd"
[13] "mass_fdiv_normalised" "mass_fdiv_actual" "mass_fdiv_min" "mass_fdiv_max"
[17] "mass_fdiv_mean" "mass_fdiv_sd" "locomotory_trait_fdiv_normalised" "locomotory_trait_fdiv_actual"
[21] "locomotory_trait_fdiv_min" "locomotory_trait_fdiv_max" "locomotory_trait_fdiv_mean" "locomotory_trait_fdiv_sd"
[25] "trophic_trait_fdiv_normalised" "trophic_trait_fdiv_actual" "trophic_trait_fdiv_min" "trophic_trait_fdiv_max"
[29] "trophic_trait_fdiv_mean" "trophic_trait_fdiv_sd" "gape_width_fdiv_normalised" "gape_width_fdiv_actual"
[33] "gape_width_fdiv_min" "gape_width_fdiv_max" "gape_width_fdiv_mean" "gape_width_fdiv_sd"
[37] "handwing_index_fdiv_normalised" "handwing_index_fdiv_actual" "handwing_index_fdiv_min" "handwing_index_fdiv_max"
[41] "handwing_index_fdiv_mean" "handwing_index_fdiv_sd" "city_id"
Join on realms
city_to_realm = read_csv(filename(CITY_DATA_OUTPUT_DIR, 'realms.csv'))
Rows: 337 Columns: 2── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (1): core_realm
dbl (1): city_id
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
community_data_with_realm = left_join(community_data, city_to_realm)
Joining with `by = join_by(city_id)`
Cities as points
city_points = st_centroid(read_sf(filename(CITY_DATA_OUTPUT_DIR, 'city_selection.shp'))) %>% left_join(community_data_with_realm)
Warning: st_centroid assumes attributes are constant over geometriesWarning: st_centroid does not give correct centroids for longitude/latitude dataJoining with `by = join_by(city_id)`
city_points_coords = st_coordinates(city_points)
city_points$latitude = city_points_coords[,1]
city_points$longitude = city_points_coords[,2]
world_map = read_country_boundaries()
Reading layer `WB_countries_Admin0_10m' from data source `/Users/james/Dropbox/PhD/WorldBank_countries_Admin0_10m/WB_countries_Admin0_10m.shp' using driver `ESRI Shapefile'
Simple feature collection with 251 features and 52 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -180 ymin: -59.47275 xmax: 180 ymax: 83.6341
Geodetic CRS: WGS 84
Warning: st_simplify does not correctly simplify longitude/latitude data, dTolerance needs to be in decimal degrees
Load community data, and create long format version
communities = read_csv(filename(COMMUNITY_OUTPUT_DIR, 'communities_for_analysis.csv'))
Rows: 2427 Columns: 12── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (5): city_name, jetz_species_name, seasonal, presence, origin
dbl (4): city_id, distance_to_northern_edge_km, distance_to_southern_edge_km, relative_abundance_proxy
lgl (3): present_urban_high, present_urban_med, present_urban_low
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
communities
community_summary = communities %>% group_by(city_id) %>% summarise(regional_pool_size = n(), urban_pool_size = sum(relative_abundance_proxy > 0))
community_summary
Load trait data
traits = read_csv(filename(TAXONOMY_OUTPUT_DIR, 'traits_jetz.csv'))
Rows: 304 Columns: 6── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (1): jetz_species_name
dbl (5): gape_width, trophic_trait, locomotory_trait, mass, handwing_index
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(traits)
Load realm geo
resolve = read_resolve()
head(resolve)
Simple feature collection with 6 features and 15 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -162.1547 ymin: -69.55876 xmax: 158.6167 ymax: 61.53428
Geodetic CRS: WGS 84
test_required_values = function(name, df) {
cat(paste(
test_value_wilcox(paste(name, 'MNTD'), df$mntd_normalised),
test_value_wilcox(paste(name, 'Beak Gape FDiv'), df$gape_width_fdiv_normalised),
test_value_wilcox(paste(name, 'HWI FDiv'), df$handwing_index_fdiv_normalised),
test_value_wilcox(paste(name, 'Mass FDiv'), df$mass_fdiv_normalised),
nrow(df),
sep = "\n"))
}
test_required_values('Global', community_data_with_realm)
Global MNTD median 0.49
Global Beak Gape FDiv median 0.59 ***
Global HWI FDiv median 0.66 ***
Global Mass FDiv median 0.65 ***
336
unique(community_data_with_realm$core_realm)
[1] "Oceania" "Nearctic" "Neotropic" "Palearctic" "Afrotropic" "Indomalayan" "Australasia"
test_required_values('Nearctic', community_data_with_realm[community_data_with_realm$core_realm == 'Nearctic',])
Warning: cannot compute exact p-value with tiesWarning: cannot compute exact p-value with ties
Nearctic MNTD median 0.65
Nearctic Beak Gape FDiv median 0.63
Nearctic HWI FDiv median 0.23 **
Nearctic Mass FDiv median 0.48
66
test_required_values('Neotropic', community_data_with_realm[community_data_with_realm$core_realm == 'Neotropic',])
Neotropic MNTD median 0.5
Neotropic Beak Gape FDiv median 0.47
Neotropic HWI FDiv median 0.52
Neotropic Mass FDiv median 0.66 ***
64
test_required_values('Palearctic', community_data_with_realm[community_data_with_realm$core_realm == 'Palearctic',])
Palearctic MNTD median 0.59 *
Palearctic Beak Gape FDiv median 0.93 ***
Palearctic HWI FDiv median 0.4
Palearctic Mass FDiv median 0.55
74
test_required_values('Afrotropic', community_data_with_realm[community_data_with_realm$core_realm == 'Afrotropic',])
Afrotropic MNTD median 0.16 *
Afrotropic Beak Gape FDiv median 0.42
Afrotropic HWI FDiv median 0.54
Afrotropic Mass FDiv median 0.34
9
test_required_values('Indomalayan', community_data_with_realm[community_data_with_realm$core_realm == 'Indomalayan',])
Indomalayan MNTD median 0.44 ***
Indomalayan Beak Gape FDiv median 0.46
Indomalayan HWI FDiv median 0.88 ***
Indomalayan Mass FDiv median 0.8 ***
116
test_required_values('Australasia', community_data_with_realm[community_data_with_realm$core_realm == 'Australasia',])
Australasia MNTD median 0.53
Australasia Beak Gape FDiv median 0.46
Australasia HWI FDiv median 0.78
Australasia Mass FDiv median 0.55
6
communities %>%
left_join(city_to_realm) %>%
mutate(family = gsub( "_.*$", "", jetz_species_name)) %>%
dplyr::select(family, core_realm) %>%
distinct() %>%
arrange(core_realm)
Joining with `by = join_by(city_id)`
communities = read_csv(filename(COMMUNITY_OUTPUT_DIR, 'communities_for_analysis.csv'))
Rows: 2427 Columns: 12── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (5): city_name, jetz_species_name, seasonal, presence, origin
dbl (4): city_id, distance_to_northern_edge_km, distance_to_southern_edge_km, relative_abundance_proxy
lgl (3): present_urban_high, present_urban_med, present_urban_low
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
city_introduced_species = communities %>% group_by(city_id) %>% summarise(number_of_species = n()) %>% left_join(
communities %>% group_by(city_id) %>% filter(origin == 'Introduced') %>% summarise(number_of_introduced_species = n())
) %>% replace_na(list(number_of_introduced_species = 0))
Joining with `by = join_by(city_id)`
community_data_with_introductions = left_join(community_data, city_introduced_species)
Joining with `by = join_by(city_id)`
community_data_with_introductions$has_introduced_species = community_data_with_introductions$number_of_introduced_species > 0
community_data_with_introductions
community_data_with_introductions[,c('mntd_normalised', 'has_introduced_species')]
community_data_with_introductions %>% group_by(has_introduced_species) %>% summarise(
total_cities = n(),
mean_mntd_normalised = mean(mntd_normalised, na.rm = T),
median_mntd_normalised = median(mntd_normalised, na.rm = T),
sd_mntd_normalised = sd(mntd_normalised, na.rm = T),
mean_mass_fdiv_normalised = mean(mass_fdiv_normalised, na.rm = T),
median_mass_fdiv_normalised = median(mass_fdiv_normalised, na.rm = T),
sd_mass_fdiv_normalised = sd(mass_fdiv_normalised, na.rm = T),
mean_gape_width_fdiv_normalised = mean(gape_width_fdiv_normalised, na.rm = T),
median_gape_width_fdiv_normalised = median(gape_width_fdiv_normalised, na.rm = T),
sd_gape_width_fdiv_normalised = sd(gape_width_fdiv_normalised, na.rm = T),
mean_handwing_index_fdiv_normalised = mean(handwing_index_fdiv_normalised, na.rm = T),
median_handwing_index_fdiv_normalised = median(handwing_index_fdiv_normalised, na.rm = T),
sd_handwing_index_fdiv_normalised = sd(handwing_index_fdiv_normalised, na.rm = T)
)
ggplot(community_data_with_introductions, aes(x = has_introduced_species, y = mntd_normalised)) + geom_boxplot()
wilcox.test(mntd_normalised ~ has_introduced_species, community_data_with_introductions, na.action = 'na.omit')
Wilcoxon rank sum test with continuity correction
data: mntd_normalised by has_introduced_species
W = 10512, p-value = 0.02269
alternative hypothesis: true location shift is not equal to 0
There is a significant difference between the response of cities with introduced species (0.53±0.27) and those without (0.47±0.19) (p-value = 0.02).
ggplot(community_data_with_introductions, aes(x = has_introduced_species, y = mass_fdiv_normalised)) + geom_boxplot()
wilcox.test(mass_fdiv_normalised ~ has_introduced_species, community_data_with_introductions, na.action = 'na.omit')
Wilcoxon rank sum test with continuity correction
data: mass_fdiv_normalised by has_introduced_species
W = 16441, p-value = 0.0000002359
alternative hypothesis: true location shift is not equal to 0
There is a significant difference between the response of cities with introduced species (0.57±0.27) and those without (0.73±0.24) (p < 0.0001)
ggplot(community_data_with_introductions, aes(x = has_introduced_species, y = gape_width_fdiv_normalised)) + geom_boxplot()
wilcox.test(gape_width_fdiv_normalised ~ has_introduced_species, community_data_with_introductions, na.action = 'na.omit')
Wilcoxon rank sum test with continuity correction
data: gape_width_fdiv_normalised by has_introduced_species
W = 10658, p-value = 0.1538
alternative hypothesis: true location shift is not equal to 0
There is NOT a significant difference between the response of cities with introduced species (0.61±0.30) and those without (0.56±0.27)
ggplot(community_data_with_introductions, aes(x = has_introduced_species, y = handwing_index_fdiv_normalised)) + geom_boxplot()
wilcox.test(handwing_index_fdiv_normalised ~ has_introduced_species, community_data_with_introductions, na.action = 'na.omit')
Wilcoxon rank sum test with continuity correction
data: handwing_index_fdiv_normalised by has_introduced_species
W = 18856, p-value < 0.00000000000000022
alternative hypothesis: true location shift is not equal to 0
There is a significant difference between the response of cities with introduced species (0.49±0.30) and those without (0.79±0.21) (p < 0.0001)
geography = read_csv(filename(CITY_DATA_OUTPUT_DIR, 'geography.csv'))
Rows: 342 Columns: 26── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
dbl (26): city_id, city_avg_ndvi, city_avg_elevation, city_avg_temp, city_avg_min_monthly_temp, city_avg_max_monthly_temp, city_avg_monthly_temp, city_avg_rai...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
names(geography)
[1] "city_id" "city_avg_ndvi" "city_avg_elevation" "city_avg_temp"
[5] "city_avg_min_monthly_temp" "city_avg_max_monthly_temp" "city_avg_monthly_temp" "city_avg_rainfall"
[9] "city_avg_max_monthly_rainfall" "city_avg_min_monthly_rainfall" "city_avg_soil_moisture" "city_max_elev"
[13] "city_min_elev" "city_elev_range" "region_20km_avg_ndvi" "region_20km_avg_elevation"
[17] "region_20km_avg_soil_moisture" "region_20km_max_elev" "region_20km_min_elev" "region_20km_elev_range"
[21] "region_50km_avg_ndvi" "region_50km_avg_elevation" "region_50km_avg_soil_moisture" "region_50km_max_elev"
[25] "region_50km_min_elev" "region_50km_elev_range"
analysis_data = community_data_with_realm[,c('city_id', 'mntd_normalised', 'mass_fdiv_normalised', 'gape_width_fdiv_normalised', 'handwing_index_fdiv_normalised', 'core_realm')] %>%
left_join(city_points[,c('city_id', 'latitude', 'longitude')]) %>%
left_join(community_data_with_introductions[,c('city_id', 'has_introduced_species')]) %>%
left_join(geography)
Joining with `by = join_by(city_id)`Joining with `by = join_by(city_id)`Joining with `by = join_by(city_id)`
analysis_data$abs_latitude = abs(analysis_data$latitude)
analysis_data$core_realm = factor(analysis_data$core_realm, levels = c('Palearctic', 'Nearctic', 'Neotropic', 'Afrotropic', 'Indomalayan', 'Australasia', 'Oceania'))
analysis_data$has_introduced_species = factor(analysis_data$has_introduced_species, level = c('TRUE', 'FALSE'), labels = c('Introduced species', 'No introduced species'))
model_data = function(df, dependant_var) {
df[,c(dependant_var, 'core_realm', 'abs_latitude', 'latitude', 'longitude', 'has_introduced_species', 'city_avg_ndvi', 'city_avg_elevation', 'city_avg_temp', 'city_avg_min_monthly_temp', 'city_avg_max_monthly_temp', 'city_avg_monthly_temp', 'city_avg_rainfall', 'city_avg_max_monthly_rainfall', 'city_avg_min_monthly_rainfall', 'city_avg_soil_moisture', 'city_max_elev', 'city_min_elev', 'city_elev_range', 'region_20km_avg_ndvi', 'region_20km_avg_elevation', 'region_20km_avg_soil_moisture', 'region_20km_max_elev', 'region_20km_min_elev', 'region_20km_elev_range', 'region_50km_avg_ndvi', 'region_50km_avg_elevation', 'region_50km_avg_soil_moisture', 'region_50km_max_elev', 'region_50km_min_elev', 'region_50km_elev_range')]
}
model_data(analysis_data, 'mntd_normalised')
names(analysis_data)
[1] "city_id" "mntd_normalised" "mass_fdiv_normalised" "gape_width_fdiv_normalised"
[5] "handwing_index_fdiv_normalised" "core_realm" "latitude" "longitude"
[9] "geometry" "has_introduced_species" "city_avg_ndvi" "city_avg_elevation"
[13] "city_avg_temp" "city_avg_min_monthly_temp" "city_avg_max_monthly_temp" "city_avg_monthly_temp"
[17] "city_avg_rainfall" "city_avg_max_monthly_rainfall" "city_avg_min_monthly_rainfall" "city_avg_soil_moisture"
[21] "city_max_elev" "city_min_elev" "city_elev_range" "region_20km_avg_ndvi"
[25] "region_20km_avg_elevation" "region_20km_avg_soil_moisture" "region_20km_max_elev" "region_20km_min_elev"
[29] "region_20km_elev_range" "region_50km_avg_ndvi" "region_50km_avg_elevation" "region_50km_avg_soil_moisture"
[33] "region_50km_max_elev" "region_50km_min_elev" "region_50km_elev_range" "abs_latitude"
all_explanatories = c(
'abs_latitude', 'latitude', 'longitude',
'has_introduced_species',
'city_avg_ndvi', 'city_avg_elevation', 'city_avg_temp', 'city_avg_min_monthly_temp', 'city_avg_max_monthly_temp',
'city_avg_monthly_temp', 'city_avg_rainfall', 'city_avg_max_monthly_rainfall', 'city_avg_min_monthly_rainfall',
'city_avg_soil_moisture', 'city_max_elev', 'city_min_elev', 'city_elev_range',
'region_20km_avg_ndvi', 'region_20km_avg_elevation', 'region_20km_avg_soil_moisture', 'region_20km_max_elev',
'region_20km_min_elev', 'region_20km_elev_range',
'region_50km_avg_ndvi', 'region_50km_avg_elevation', 'region_50km_avg_soil_moisture', 'region_50km_max_elev',
'region_50km_min_elev', 'region_50km_elev_range',
'core_realmAfrotropic', 'core_realmAustralasia', 'core_realmIndomalayan', 'core_realmNearctic', 'core_realmNeotropic', 'core_realmPalearctic')
type_labels = function(p) {
explanatory_levels = all_explanatories[all_explanatories %in% p$explanatory]
p$explanatory <- factor(p$explanatory, levels = explanatory_levels)
p$type <- 'Realm'
p$type[p$explanatory %in% c('city_avg_ndvi', 'city_avg_elevation', 'city_avg_temp', 'city_avg_min_monthly_temp', 'city_avg_max_monthly_temp',
'city_avg_monthly_temp', 'city_avg_rainfall', 'city_avg_max_monthly_rainfall', 'city_avg_min_monthly_rainfall',
'city_avg_soil_moisture', 'city_max_elev', 'city_min_elev', 'city_elev_range')] <- 'City geography'
p$type[p$explanatory %in% c('region_50km_avg_ndvi', 'region_50km_avg_elevation', 'region_50km_avg_soil_moisture', 'region_50km_max_elev',
'region_50km_min_elev', 'region_50km_elev_range')] <- 'Regional (50km) geography'
p$type[p$explanatory %in% c('region_20km_avg_ndvi', 'region_20km_avg_elevation', 'region_20km_avg_soil_moisture', 'region_20km_max_elev',
'region_20km_min_elev', 'region_20km_elev_range')] <- 'Regional (20km) geography'
p$type[p$explanatory %in% c('abs_latitude', 'latitude', 'longitude')] <- 'Spatial'
p
}
explanatory_labels = c(
'has_introduced_species'='Has introduced species',
'city_avg_ndvi'='Average NDVI',
'city_avg_elevation'='Average elevation',
'city_avg_temp'='Average temperature',
'city_avg_min_monthly_temp'='Average minimum monthly temperature',
'city_avg_max_monthly_temp'='Average maximum monthly temperature',
'city_avg_monthly_temp'='Average monthly temperature',
'city_avg_rainfall'='Average rainfall',
'city_avg_max_monthly_rainfall'='Average maximum monthly rainfall',
'city_avg_min_monthly_rainfall'='Average minimum monthly rainfall',
'city_avg_soil_moisture'='Average soil moisture',
'city_max_elev'='Maximum elevation',
'city_min_elev'='Minimum elevation',
'city_elev_range'='Elevation range',
'region_20km_avg_ndvi'='Average NDVI',
'region_20km_avg_elevation'='Average elevation',
'region_20km_avg_soil_moisture'='Average soil moisture',
'region_20km_max_elev'='Maximum elevation',
'region_20km_min_elev'='Minimum elevation',
'region_20km_elev_range'='Elevation range',
'region_50km_avg_ndvi'='Average NDVI',
'region_50km_avg_elevation'='Average elevation',
'region_50km_avg_soil_moisture'='Average soil moisture',
'region_50km_max_elev'='Maximum elevation',
'region_50km_min_elev'='Minimum elevation',
'region_50km_elev_range'='Elevation range',
'abs_latitude' = 'Absolute latitude',
'latitude' = 'Latitude',
'longitude' = 'Longitude',
'core_realmAfrotropic' = 'Afrotropical',
'core_realmAustralasia' = 'Austaliasian',
'core_realmIndomalayan' = 'Indomalayan',
'core_realmNearctic' = 'Nearctic',
'core_realmNeotropic' = 'Neotropical',
'core_realmPalearctic' = 'Palearctic',
'core_realmOceania' = 'Oceanical')
geom_normalised_histogram = function(name, gg, legend.position = "bottom") {
gg +
geom_histogram(aes(fill = core_realm), binwidth = 0.1, position = "dodge") +
geom_vline(aes(xintercept = 0.5), color = "#000000", size = 0.4) +
geom_vline(aes(xintercept = 0), color = "#000000", size = 0.2, linetype = "dashed") +
geom_vline(aes(xintercept = 1), color = "#000000", size = 0.2, linetype = "dashed") +
ylab("Number of cities") + xlab("Normalised Response") + ylim(c(0, 70)) +
labs(title = name, fill = 'Realm') +
theme_bw() +
theme(legend.position=legend.position)
}
geom_map = function(map_sf, title) {
norm_mntd_analysis_geo = ggplot() +
geom_sf(data = world_map, aes(geometry = geometry)) +
map_sf +
normalised_colours_scale +
labs(colour = 'Normalised\nResponse') +
theme_bw() +
theme(legend.position="bottom")
}
# Taken from MuMIN package
# https://rdrr.io/cran/MuMIn/src/R/averaging.R
# https://rdrr.io/cran/MuMIn/src/R/model.avg.R
.coefarr.avg <-
function(cfarr, weight, revised.var, full, alpha) {
weight <- weight / sum(weight)
nCoef <- dim(cfarr)[3L]
if(full) {
nas <- is.na(cfarr[, 1L, ]) & is.na(cfarr[, 2L, ])
cfarr[, 1L, ][nas] <- cfarr[, 2L, ][nas] <- 0
#cfarr[, 1L:2L, ][is.na(cfarr[, 1L:2L, ])] <- 0
if(!all(is.na(cfarr[, 3L, ])))
cfarr[ ,3L, ][is.na(cfarr[ , 3L, ])] <- Inf
}
avgcoef <- array(dim = c(nCoef, 5L),
dimnames = list(dimnames(cfarr)[[3L]], c("Estimate",
"Std. Error", "Adjusted SE", "Lower CI", "Upper CI")))
for(i in seq_len(nCoef))
avgcoef[i, ] <- par.avg(cfarr[, 1L, i], cfarr[, 2L, i], weight,
df = cfarr[, 3L, i], alpha = alpha, revised.var = revised.var)
avgcoef[is.nan(avgcoef)] <- NA
return(avgcoef)
}
.makecoefmat <- function(cf) {
no.ase <- all(is.na(cf[, 3L]))
z <- abs(cf[, 1L] / cf[, if(no.ase) 2L else 3L])
pval <- 2 * pnorm(z, lower.tail = FALSE)
cbind(cf[, if(no.ase) 1L:2L else 1L:3L, drop = FALSE],
`z value` = z, `Pr(>|z|)` = zapsmall(pval))
}
# Generate model selections using lmer, dredge, and model.avg
# `forumla` : a two-sided linear formula object describing both the fixed-effects and random-effects part of the model
# `data` : the data frame containing the variables from the formula
# `aic_delta` : the AIC delta to use for selecting models in model average
model_average <- function(formula, data, aic_delta = 20) {
model <- lm(
formula,
data=data
)
dredge_result <- dredge(model)
summary(model.avg(dredge_result, subset = delta < aic_delta))
}
# Create a summary data frame containing the selected variables from a model
# `model_sum` : The model summary output from `model_average`
model_summary <- function(model_sum) {
.column_name <- function(postfix) {
postfix
}
# just return the estimate and p value
weight <- model_sum$msTable[, 5L]
coefmat.full <- as.data.frame(.makecoefmat(.coefarr.avg(model_sum$coefArray, weight,
attr(model_sum, "revised.var"), TRUE, 0.05)))
coefmat.subset <-
as.data.frame(.makecoefmat(.coefarr.avg(model_sum$coefArray, weight,
attr(model_sum, "revised.var"), FALSE, 0.05)))
coefmat.subset <- coefmat.subset[-c(1), c(1, 2, 5)]
names(coefmat.subset) <- c(.column_name("estimate"), .column_name("error"), .column_name("p"))
coefmat.subset <- tibble::rownames_to_column(coefmat.subset, "explanatory")
coefmat.subset$model = 'subset'
coefmat.full <- coefmat.full[-c(1), c(1, 2, 5)]
names(coefmat.full) <- c(.column_name("estimate"), .column_name("error"), .column_name("p"))
coefmat.full <- tibble::rownames_to_column(coefmat.full, "explanatory")
coefmat.full$model = 'full'
rbind(coefmat.full, coefmat.subset)
}
formula_from_vsurp = function(predictors, dependent, vsurp_result) {
as.formula(paste(dependent, paste(names(predictors[,vsurp_result$varselect.interp]), collapse="+"), sep = "~"))
}
plot_vsurp_result = function(result_table) {
plot = result_table[result_table$model == 'full',]
plot = type_labels(plot)
ggplot(plot, aes(y=explanatory, x=estimate, colour = type)) +
geom_line() +
geom_point()+
geom_errorbar(aes(xmin=estimate-error, xmax=estimate+error), width=.2,
position=position_dodge(0.05)) +
scale_y_discrete(
limits = rev(levels(plot$explanatory)),
labels = explanatory_labels) +
theme_bw() +
geom_vline(xintercept=0, linetype="dotted") +
guides(colour=guide_legend(title="Predictor type")) + xlab('Increase from 0 (habitat filtering)\nto 1 (competitive exclusion)\n± Standard Error') + ylab('Predictor') +
theme(legend.justification = "top")
}
norm_mntd_analysis_plot = geom_normalised_histogram(
'MNTD',
ggplot(analysis_data, aes(mntd_normalised))
)
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
Please use `linewidth` instead.
norm_mntd_analysis_plot
norm_mntd_analysis_geo_plot = geom_map(geom_sf(data = analysis_data, aes(color = mntd_normalised, geometry = geometry)), 'MNTD')
norm_mntd_analysis_geo_plot
norm_mntd_analysis_data = model_data(analysis_data[!is.na(analysis_data$mntd_normalised),], 'mntd_normalised')
norm_mntd_analysis_predictors = norm_mntd_analysis_data[,-1]
norm_mntd_analysis_interp = VSURF(x = norm_mntd_analysis_predictors, y = norm_mntd_analysis_data$mntd_normalised)
Thresholding step
Estimated computational time (on one core): 19.5 sec.
|
| | 0%
|
|======== | 5%
|
|=============== | 10%
|
|======================= | 15%
|
|============================== | 20%
|
|====================================== | 25%
|
|============================================== | 30%
|
|===================================================== | 35%
|
|============================================================= | 40%
|
|==================================================================== | 45%
|
|============================================================================ | 50%
|
|==================================================================================== | 55%
|
|=========================================================================================== | 60%
|
|=================================================================================================== | 65%
|
|========================================================================================================== | 70%
|
|================================================================================================================== | 75%
|
|========================================================================================================================== | 80%
|
|================================================================================================================================= | 85%
|
|========================================================================================================================================= | 90%
|
|================================================================================================================================================ | 95%
|
|========================================================================================================================================================| 100%
Interpretation step (on 30 variables)
Estimated computational time (on one core): between 7.8 sec. and 37.2 sec.
|
| | 0%
|
|===== | 3%
|
|========== | 7%
|
|=============== | 10%
|
|==================== | 13%
|
|========================= | 17%
|
|============================== | 20%
|
|=================================== | 23%
|
|========================================= | 27%
|
|============================================== | 30%
|
|=================================================== | 33%
|
|======================================================== | 37%
|
|============================================================= | 40%
|
|================================================================== | 43%
|
|======================================================================= | 47%
|
|============================================================================ | 50%
|
|================================================================================= | 53%
|
|====================================================================================== | 57%
|
|=========================================================================================== | 60%
|
|================================================================================================ | 63%
|
|===================================================================================================== | 67%
|
|========================================================================================================== | 70%
|
|=============================================================================================================== | 73%
|
|===================================================================================================================== | 77%
|
|========================================================================================================================== | 80%
|
|=============================================================================================================================== | 83%
|
|==================================================================================================================================== | 87%
|
|========================================================================================================================================= | 90%
|
|============================================================================================================================================== | 93%
|
|=================================================================================================================================================== | 97%
|
|========================================================================================================================================================| 100%
Prediction step (on 6 variables)
Maximum estimated computational time (on one core): 2.3 sec.
|
| | 0%
|
|========================= | 17%
|
|=================================================== | 33%
|
|============================================================================ | 50%
|
|===================================================================================================== | 67%
|
|=============================================================================================================================== | 83%
|
|========================================================================================================================================================| 100%
names(norm_mntd_analysis_predictors[,norm_mntd_analysis_interp$varselect.interp])
[1] "city_avg_monthly_temp" "core_realm" "city_avg_max_monthly_temp" "longitude" "city_avg_temp"
[6] "latitude"
norm_mntd_analysis_formula = formula_from_vsurp(norm_mntd_analysis_predictors, "mntd_normalised", norm_mntd_analysis_interp)
norm_mntd_analysis_result <- model_average(norm_mntd_analysis_formula, norm_mntd_analysis_data)
Fixed term is "(Intercept)"
norm_mntd_analysis_result_table = model_summary(norm_mntd_analysis_result)
norm_mntd_analysis_result_table
norm_mntd_analysis_pred_plot = plot_vsurp_result(norm_mntd_analysis_result_table)
norm_mntd_analysis_pred_plot
norm_gape_fdiv_analysis_plot = geom_normalised_histogram(
'Gape FDiv',
ggplot(analysis_data, aes(gape_width_fdiv_normalised))
)
norm_gape_fdiv_analysis_plot
norm_gape_fdiv_analysis_geo_plot = geom_map(geom_sf(data = analysis_data, aes(color = gape_width_fdiv_normalised, geometry = geometry)), 'Gape Width FDiv')
norm_gape_fdiv_analysis_geo_plot
norm_gape_fdiv_analysis_data = model_data(analysis_data[!is.na(analysis_data$gape_width_fdiv_normalised),], 'gape_width_fdiv_normalised')
norm_gape_fdiv_analysis_predictors = norm_gape_fdiv_analysis_data[,-1]
norm_gape_fdiv_analysis_interp = VSURF(x = norm_gape_fdiv_analysis_predictors, y = norm_gape_fdiv_analysis_data$gape_width_fdiv_normalised)
Thresholding step
Estimated computational time (on one core): 16.8 sec.
|
| | 0%
|
|======== | 5%
|
|=============== | 10%
|
|======================= | 15%
|
|============================== | 20%
|
|====================================== | 25%
|
|============================================== | 30%
|
|===================================================== | 35%
|
|============================================================= | 40%
|
|==================================================================== | 45%
|
|============================================================================ | 50%
|
|==================================================================================== | 55%
|
|=========================================================================================== | 60%
|
|=================================================================================================== | 65%
|
|========================================================================================================== | 70%
|
|================================================================================================================== | 75%
|
|========================================================================================================================== | 80%
|
|================================================================================================================================= | 85%
|
|========================================================================================================================================= | 90%
|
|================================================================================================================================================ | 95%
|
|========================================================================================================================================================| 100%
Interpretation step (on 30 variables)
Estimated computational time (on one core): between 8.4 sec. and 39 sec.
|
| | 0%
|
|===== | 3%
|
|========== | 7%
|
|=============== | 10%
|
|==================== | 13%
|
|========================= | 17%
|
|============================== | 20%
|
|=================================== | 23%
|
|========================================= | 27%
|
|============================================== | 30%
|
|=================================================== | 33%
|
|======================================================== | 37%
|
|============================================================= | 40%
|
|================================================================== | 43%
|
|======================================================================= | 47%
|
|============================================================================ | 50%
|
|================================================================================= | 53%
|
|====================================================================================== | 57%
|
|=========================================================================================== | 60%
|
|================================================================================================ | 63%
|
|===================================================================================================== | 67%
|
|========================================================================================================== | 70%
|
|=============================================================================================================== | 73%
|
|===================================================================================================================== | 77%
|
|========================================================================================================================== | 80%
|
|=============================================================================================================================== | 83%
|
|==================================================================================================================================== | 87%
|
|========================================================================================================================================= | 90%
|
|============================================================================================================================================== | 93%
|
|=================================================================================================================================================== | 97%
|
|========================================================================================================================================================| 100%
Prediction step (on 9 variables)
Maximum estimated computational time (on one core): 4 sec.
|
| | 0%
|
|================= | 11%
|
|================================== | 22%
|
|=================================================== | 33%
|
|==================================================================== | 44%
|
|==================================================================================== | 56%
|
|===================================================================================================== | 67%
|
|====================================================================================================================== | 78%
|
|======================================================================================================================================= | 89%
|
|========================================================================================================================================================| 100%
names(norm_gape_fdiv_analysis_predictors[,norm_gape_fdiv_analysis_interp$varselect.interp])
[1] "longitude" "core_realm" "abs_latitude" "city_avg_temp"
[5] "city_avg_max_monthly_temp" "city_avg_min_monthly_temp" "city_avg_max_monthly_rainfall" "latitude"
[9] "city_max_elev"
norm_gape_fdiv_analysis_formula = formula_from_vsurp(norm_gape_fdiv_analysis_predictors, "gape_width_fdiv_normalised", norm_gape_fdiv_analysis_interp)
norm_gape_fdiv_analysis_result <- model_average(norm_gape_fdiv_analysis_formula, norm_gape_fdiv_analysis_data)
Fixed term is "(Intercept)"
norm_gape_fdiv_analysis_result_table = model_summary(norm_gape_fdiv_analysis_result)
norm_gape_fdiv_analysis_result_table
norm_gape_fdiv_analysis_pred_plot = plot_vsurp_result(norm_gape_fdiv_analysis_result_table)
norm_gape_fdiv_analysis_pred_plot
norm_hwi_fdiv_loco_analysis_plot = geom_normalised_histogram(
'HWI FDiv',
ggplot(analysis_data, aes(handwing_index_fdiv_normalised))
)
norm_hwi_fdiv_loco_analysis_plot
norm_hwi_fdiv_analysis_geo_plot = geom_map(geom_sf(data = analysis_data, aes(color = handwing_index_fdiv_normalised, geometry = geometry)), 'HWI FDiv')
norm_hwi_fdiv_analysis_geo_plot
norm_hwi_fdiv_analysis_data = model_data(analysis_data[!is.na(analysis_data$handwing_index_fdiv_normalised),], 'handwing_index_fdiv_normalised')
norm_hwi_fdiv_analysis_predictors = norm_hwi_fdiv_analysis_data[,-1]
norm_hwi_fdiv_analysis_interp = VSURF(x = norm_hwi_fdiv_analysis_predictors, y = norm_hwi_fdiv_analysis_data$handwing_index_fdiv_normalised)
Thresholding step
Estimated computational time (on one core): 15.3 sec.
|
| | 0%
|
|======== | 5%
|
|=============== | 10%
|
|======================= | 15%
|
|============================== | 20%
|
|====================================== | 25%
|
|============================================== | 30%
|
|===================================================== | 35%
|
|============================================================= | 40%
|
|==================================================================== | 45%
|
|============================================================================ | 50%
|
|==================================================================================== | 55%
|
|=========================================================================================== | 60%
|
|=================================================================================================== | 65%
|
|========================================================================================================== | 70%
|
|================================================================================================================== | 75%
|
|========================================================================================================================== | 80%
|
|================================================================================================================================= | 85%
|
|========================================================================================================================================= | 90%
|
|================================================================================================================================================ | 95%
|
|========================================================================================================================================================| 100%
Interpretation step (on 30 variables)
Estimated computational time (on one core): between 6.6 sec. and 36.3 sec.
|
| | 0%
|
|===== | 3%
|
|========== | 7%
|
|=============== | 10%
|
|==================== | 13%
|
|========================= | 17%
|
|============================== | 20%
|
|=================================== | 23%
|
|========================================= | 27%
|
|============================================== | 30%
|
|=================================================== | 33%
|
|======================================================== | 37%
|
|============================================================= | 40%
|
|================================================================== | 43%
|
|======================================================================= | 47%
|
|============================================================================ | 50%
|
|================================================================================= | 53%
|
|====================================================================================== | 57%
|
|=========================================================================================== | 60%
|
|================================================================================================ | 63%
|
|===================================================================================================== | 67%
|
|========================================================================================================== | 70%
|
|=============================================================================================================== | 73%
|
|===================================================================================================================== | 77%
|
|========================================================================================================================== | 80%
|
|=============================================================================================================================== | 83%
|
|==================================================================================================================================== | 87%
|
|========================================================================================================================================= | 90%
|
|============================================================================================================================================== | 93%
|
|=================================================================================================================================================== | 97%
|
|========================================================================================================================================================| 100%
Prediction step (on 7 variables)
Maximum estimated computational time (on one core): 2.3 sec.
|
| | 0%
|
|====================== | 14%
|
|=========================================== | 29%
|
|================================================================= | 43%
|
|======================================================================================= | 57%
|
|============================================================================================================= | 71%
|
|================================================================================================================================== | 86%
|
|========================================================================================================================================================| 100%
names(norm_hwi_fdiv_analysis_predictors[,norm_hwi_fdiv_analysis_interp$varselect.interp])
[1] "latitude" "core_realm" "city_avg_max_monthly_temp" "city_avg_temp"
[5] "longitude" "city_avg_max_monthly_rainfall" "region_50km_min_elev"
norm_hwi_fdiv_analysis_formula = formula_from_vsurp(norm_hwi_fdiv_analysis_predictors, "handwing_index_fdiv_normalised", norm_hwi_fdiv_analysis_interp)
norm_hwi_fdiv_analysis_result <- model_average(norm_hwi_fdiv_analysis_formula, norm_hwi_fdiv_analysis_data)
Fixed term is "(Intercept)"
norm_hwi_fdiv_analysis_result_table = model_summary(norm_hwi_fdiv_analysis_result)
norm_hwi_fdiv_analysis_result_table
norm_hwi_fdiv_analysis_pred_plot = plot_vsurp_result(norm_hwi_fdiv_analysis_result_table)
norm_hwi_fdiv_analysis_pred_plot
norm_mass_fdiv_loco_analysis_plot = geom_normalised_histogram(
'Mass FDiv',
ggplot(analysis_data, aes(mass_fdiv_normalised))
)
norm_mass_fdiv_loco_analysis_plot
norm_mass_fdiv_analysis_geo_plot = geom_map(geom_sf(data = analysis_data, aes(color = mass_fdiv_normalised, geometry = geometry)), 'Mass FDiv')
norm_mass_fdiv_analysis_geo_plot
norm_mass_fdiv_analysis_data = model_data(analysis_data[!is.na(analysis_data$mass_fdiv_normalised),], 'mass_fdiv_normalised')
norm_mass_fdiv_analysis_predictors = norm_mass_fdiv_analysis_data[,-1]
norm_mass_fdiv_analysis_interp = VSURF(x = norm_mass_fdiv_analysis_predictors, y = norm_mass_fdiv_analysis_data$mass_fdiv_normalised)
Thresholding step
Estimated computational time (on one core): 16.6 sec.
|
| | 0%
|
|======== | 5%
|
|=============== | 10%
|
|======================= | 15%
|
|============================== | 20%
|
|====================================== | 25%
|
|============================================== | 30%
|
|===================================================== | 35%
|
|============================================================= | 40%
|
|==================================================================== | 45%
|
|============================================================================ | 50%
|
|==================================================================================== | 55%
|
|=========================================================================================== | 60%
|
|=================================================================================================== | 65%
|
|========================================================================================================== | 70%
|
|================================================================================================================== | 75%
|
|========================================================================================================================== | 80%
|
|================================================================================================================================= | 85%
|
|========================================================================================================================================= | 90%
|
|================================================================================================================================================ | 95%
|
|========================================================================================================================================================| 100%
Interpretation step (on 30 variables)
Estimated computational time (on one core): between 1.8 sec. and 34.5 sec.
|
| | 0%
|
|===== | 3%
|
|========== | 7%
|
|=============== | 10%
|
|==================== | 13%
|
|========================= | 17%
|
|============================== | 20%
|
|=================================== | 23%
|
|========================================= | 27%
|
|============================================== | 30%
|
|=================================================== | 33%
|
|======================================================== | 37%
|
|============================================================= | 40%
|
|================================================================== | 43%
|
|======================================================================= | 47%
|
|============================================================================ | 50%
|
|================================================================================= | 53%
|
|====================================================================================== | 57%
|
|=========================================================================================== | 60%
|
|================================================================================================ | 63%
|
|===================================================================================================== | 67%
|
|========================================================================================================== | 70%
|
|=============================================================================================================== | 73%
|
|===================================================================================================================== | 77%
|
|========================================================================================================================== | 80%
|
|=============================================================================================================================== | 83%
|
|==================================================================================================================================== | 87%
|
|========================================================================================================================================= | 90%
|
|============================================================================================================================================== | 93%
|
|=================================================================================================================================================== | 97%
|
|========================================================================================================================================================| 100%
Prediction step (on 5 variables)
Maximum estimated computational time (on one core): 0.9 sec.
|
| | 0%
|
|============================== | 20%
|
|============================================================= | 40%
|
|=========================================================================================== | 60%
|
|========================================================================================================================== | 80%
|
|========================================================================================================================================================| 100%
names(norm_mass_fdiv_analysis_predictors[,norm_mass_fdiv_analysis_interp$varselect.interp])
[1] "core_realm" "abs_latitude" "city_avg_temp" "latitude"
[5] "city_avg_max_monthly_rainfall"
norm_mass_fdiv_analysis_formula = formula_from_vsurp(norm_mass_fdiv_analysis_predictors, "mass_fdiv_normalised", norm_mass_fdiv_analysis_interp)
norm_mass_fdiv_analysis_result <- model_average(norm_mass_fdiv_analysis_formula, norm_mass_fdiv_analysis_data)
Fixed term is "(Intercept)"
norm_mass_fdiv_analysis_result_table = model_summary(norm_mass_fdiv_analysis_result)
norm_mass_fdiv_analysis_result_table
norm_mass_fdiv_analysis_pred_plot = plot_vsurp_result(norm_mass_fdiv_analysis_result_table)
norm_mass_fdiv_analysis_pred_plot
pred_legend <- get_legend(
# create some space to the left of the legend
norm_hwi_fdiv_analysis_pred_plot + theme(legend.box.margin = margin(0, 0, 0, 12))
)
Warning: Removed 1 row containing missing values or values outside the scale range (`geom_line()`).`geom_line()`: Each group consists of only one observation.
ℹ Do you need to adjust the group aesthetic?Warning: Removed 1 row containing missing values or values outside the scale range (`geom_point()`).Warning: Multiple components found; returning the first one. To return all, use `return_all = TRUE`.
geo_legend <- get_legend(
# create some space to the left of the legend
norm_mass_fdiv_analysis_geo_plot + theme(legend.box.margin = margin(0, 0, 0, 12))
)
Warning: Multiple components found; returning the first one. To return all, use `return_all = TRUE`.
plot_grid(
plot_grid(
norm_mntd_analysis_geo_plot + theme(legend.position="none"),
norm_mntd_analysis_pred_plot + theme(legend.position="none"),
nrow = 1
) + draw_label("MNTD", size = 16, angle = 90, x = 0.01, y = 0.5),
plot_grid(
norm_gape_fdiv_analysis_geo_plot + theme(legend.position="none"),
norm_gape_fdiv_analysis_pred_plot + theme(legend.position="none"),
nrow = 1
) + draw_label("Gape", size = 16, angle = 90, x = 0.01, y = 0.5),
plot_grid(
norm_hwi_fdiv_analysis_geo_plot + theme(legend.position="none"),
norm_hwi_fdiv_analysis_pred_plot + theme(legend.position="none"),
nrow = 1
) + draw_label("HWI", size = 16, angle = 90, x = 0.01, y = 0.5),
plot_grid(
norm_mass_fdiv_analysis_geo_plot + theme(legend.position="none"),
norm_mass_fdiv_analysis_pred_plot + theme(legend.position="none"),
nrow = 1
) + draw_label("Mass", size = 16, angle = 90, x = 0.01, y = 0.5),
plot_grid(
geo_legend,
pred_legend,
nrow = 1
),
nrow = 5
)
`geom_line()`: Each group consists of only one observation.
ℹ Do you need to adjust the group aesthetic?`geom_line()`: Each group consists of only one observation.
ℹ Do you need to adjust the group aesthetic?Warning: Removed 1 row containing missing values or values outside the scale range (`geom_line()`).`geom_line()`: Each group consists of only one observation.
ℹ Do you need to adjust the group aesthetic?Warning: Removed 1 row containing missing values or values outside the scale range (`geom_point()`).Warning: Removed 1 row containing missing values or values outside the scale range (`geom_line()`).`geom_line()`: Each group consists of only one observation.
ℹ Do you need to adjust the group aesthetic?Warning: Removed 1 row containing missing values or values outside the scale range (`geom_point()`).
ggsave(filename(FIGURES_OUTPUT_DIR, 'process_response.jpg'), width = 2500, height=5000, units = 'px')
ggplot(analysis_data, aes(x = gape_width_fdiv_normalised, y = mntd_normalised, colour = core_realm)) +
geom_point() +
ylab("MNTD") +
xlab("Gape Width FDiv") +
theme_bw() + labs(color = "Realm")
ggplot(analysis_data, aes(x = handwing_index_fdiv_normalised, y = mntd_normalised, colour = core_realm)) +
geom_point() +
ylab("MNTD") +
xlab("HWI FDiv") +
theme_bw() + labs(color = "Realm")
ggplot(analysis_data, aes(x = handwing_index_fdiv_normalised, y = gape_width_fdiv_normalised, colour = core_realm)) +
geom_point() +
ylab("Gape Width FDiv") +
xlab("HWI FDiv") +
theme_bw() + labs(color = "Realm")
mntd_fdiv_analysis = analysis_data %>%
dplyr::select(city_id, mntd_normalised, handwing_index_fdiv_normalised, gape_width_fdiv_normalised) %>%
left_join(community_summary) %>%
mutate(urban_pool_perc = urban_pool_size * 100 / regional_pool_size)
Joining with `by = join_by(city_id)`
mntd_fdiv_analysis
ggpairs(mntd_fdiv_analysis %>% dplyr::select(mntd_normalised, handwing_index_fdiv_normalised, gape_width_fdiv_normalised, regional_pool_size, urban_pool_size, urban_pool_perc))
ggsave(filename(FIGURES_OUTPUT_DIR, 'appendix_normalised_correlation.jpg'))
Saving 7.29 x 4.51 in image